#Figure 4 - Permutation test
library(dplyr)
library(ggplot2)
library(tidyverse)
library(gridExtra)


rm(list= ls())
#### Load dataset ####
dataset <- readRDS("dataset_leip_dataverse.rds")

#### PREP DATA ####

## Find pcteligible2013 above 50th percentile to create Z

dataset %>%
  mutate(
    Z = pcteligible2013 >= median(pcteligible2013)
  ) -> dataset

## Create interaction variables with Z
dataset %>%
  mutate(Zx = Z*x) %>%
  mutate(Zz = Z*z) %>%
  mutate(Zzx = Z*zx) -> dataset_processed

##Find where abs(x) < 100

reg_filter <- abs(dataset$x)<100

##Fit regressions

fit_reg_20142010 <- lm(
  dregistration20142010 ~ 
    z +
    Z +
    Zz +
    x +
    zx +
    Zx +
    Zzx,
  data=dataset_processed[reg_filter,]
)

fit_turn_20142010 <- lm(
  dturnout20142010 ~ 
    z +
    Z +
    Zz +
    x +
    zx +
    Zx +
    Zzx,
  data=dataset_processed[reg_filter,]
)

fit_reg_20162012 <- lm(
  dregistration20162012 ~ 
    z +
    Z +
    Zz +
    x +
    zx +
    Zx +
    Zzx,
  data=dataset_processed[reg_filter,]
)

fit_turn_20162012 <- lm(
  dturnout20162012 ~ 
    z +
    Z +
    Zz +
    x +
    zx +
    Zx +
    Zzx,
  data=dataset_processed[reg_filter,]
)

est1z <- as.double(coef(fit_reg_20142010)["Zz"])
est2z <- as.double(coef(fit_turn_20142010)["Zz"])
est3z <- as.double(coef(fit_reg_20162012)["Zz"])
est4z <- as.double(coef(fit_turn_20162012)["Zz"])

sim_coefficients <- tibble(
  index = seq(1,1000),
  B1 = rep(0,1000),
  B2 = rep(0,1000),
  B3 = rep(0,1000),
  B4 = rep(0,1000)
)

dataset_processed <- tibble::rowid_to_column(dataset_processed, "index_preserved")

## dataset_processed is undamaged, before each simulation iteration (preserve / restore cycle)

## Import distance for placebo dataset

load("D:/Replication/dataset_distanceforplacebo.RData")

dataset_distanceforplacebo <- x

merge(dataset_processed, dataset_distanceforplacebo, by= 'fips',  all.x = TRUE) %>%
  select(-one_of(c('border.y', 'fipsstate.y', 'stateabbr.y', 'z.y'))) %>%
  rename(border = border.x, fipsstate = fipsstate.x, stateabbr = stateabbr.x, z = z.x) -> dataset_merged
### Run 1000 simulations ###

for(i in 1:1000){
  
  
  dataset_merged %>%
    group_by(fipsstate) %>%
    summarise(z = mean(z)) -> dataset_col
  
  dataset_col["rannum"] <- runif(length(dataset_col$fipsstate))
  
  dataset_col <-arrange(dataset_col, rannum)
  
  dataset_col <- tibble::rowid_to_column(dataset_col, "index")
  
  dataset_col %>%
    mutate(z2 = index > 21) -> dataset_col
  
  
  dataset_sim <- merge(dataset_col, dataset_merged)
  
  
  ###
  dataset_sim %>%
    mutate(z = z2) -> dataset_sim
  
  
  dataset_sim %>% 
    group_by(border) %>%
    summarise(borderz = mean(z)) -> borderz_group
  
  borderz_group<-borderz_group[!(borderz_group$borderz==0 | borderz_group$borderz==1),]
  
  dataset_sim <- merge(dataset_sim, borderz_group)
  if(all(is.na(borderz_group))){
    stop("2 - border")
  }
  
  ###
  
  dataset_sim %>%
    mutate(x = abs(distance)) %>%
    mutate(x = ifelse(z==0, -x, x)) %>%
    mutate(zx = z*x) %>%
    mutate(
      Z = pcteligible2013 >= median(pcteligible2013)
    ) %>%
    mutate(Zx = Z*x) %>%
    mutate(Zz = Z*z) %>%
    mutate(Zzx = Z*zx) %>%
    mutate(X = abs(x))-> dataset_sim
  
  dataset_sim %>%
    group_by(fips) %>%
    summarise(mindist = min(X)) -> fips_groups
  
  ##
  
  merge(dataset_sim, fips_groups) %>%
    filter(X == mindist) -> dataset_sim
  
  
  ##Find where abs(x) < 100
  
  reg_filter <- abs(dataset_sim$x)<100
  
  fit_reg_20142010 <- tryCatch({
    fit_reg_20142010 <- lm(
      dregistration20142010 ~ 
        z +
        Z +
        Zz +
        x +
        zx +
        Zx +
        Zzx,
      data=dataset_sim[reg_filter,]
    )
  }, warning = function(war) {}, error = function(err) {}, finally = {})
  
  fit_turn_20142010 <- tryCatch({
    fit_turn_20142010 <- lm(
      dturnout20142010 ~ 
        z +
        Z +
        Zz +
        x +
        zx +
        Zx +
        Zzx,
      data=dataset_sim[reg_filter,]
    )
  }, warning = function(war) {}, error = function(err) {}, finally = {})
  
  fit_reg_20162012 <- tryCatch({
    fit_reg_20162012 <- lm(
      dregistration20162012 ~ 
        z +
        Z +
        Zz +
        x +
        zx +
        Zx +
        Zzx,
      data=dataset_sim[reg_filter,]
    )
  }, warning = function(war) {}, error = function(err) {}, finally = {})
  
  fit_turn_20162012 <- tryCatch({
    fit_turn_20162012 <- lm(
      dturnout20162012 ~ 
        z +
        Z +
        Zz +
        x +
        zx +
        Zx +
        Zzx,
      data=dataset_sim[reg_filter,]
    )
  }, warning = function(war) {}, error = function(err) {}, finally = {})

   
  B1 <- as.double(coef(fit_reg_20142010)["Zz"])
  sim_coefficients[i, "B1"] <- ifelse(is_empty(B1),NaN,B1)
  B2 <- as.double(coef(fit_turn_20142010)["Zz"])
  sim_coefficients[i, "B2"] <- ifelse(is_empty(B2),NaN,B2)
  B3 <- as.double(coef(fit_reg_20162012)["Zz"])
  sim_coefficients[i, "B3"] <- ifelse(is_empty(B3),NaN,B3)
  B4 <- as.double(coef(fit_turn_20162012)["Zz"])
  sim_coefficients[i, "B4"] <- ifelse(is_empty(B4),NaN,B4)
}


sim_coefficients <- sim_coefficients[complete.cases(sim_coefficients),]

sim_coefficients %>%
  mutate(b1_cond=abs(B1)>abs(est1z)) %>%
  mutate(b2_cond=abs(B2)>abs(est2z)) %>%
  mutate(b3_cond=abs(B3)>abs(est3z)) %>%
  mutate(b4_cond=abs(B4)>abs(est4z)) -> sim_coefficients

covered1 <- sum(sim_coefficients$b1_cond)/length(sim_coefficients$B1)*100
covered2 <- sum(sim_coefficients$b2_cond)/length(sim_coefficients$B2)*100
covered3 <- sum(sim_coefficients$b3_cond)/length(sim_coefficients$B3)*100
covered4 <- sum(sim_coefficients$b4_cond)/length(sim_coefficients$B4)*100

### Data analysis done

### Create 4 histograms

ggplot(data=sim_coefficients, aes(x=B1, fill=b1_cond, y=..density..)) +
  xlim(-5,5)+
  geom_histogram(color="black", binwidth = 0.25) +
  geom_vline(xintercept = -est1z, linetype="dashed") +
  geom_vline(xintercept = +est1z, linetype="dashed") +
  scale_fill_manual(values = c("white", "grey"))+
  xlab("Estimate")+
  ylab("Density")+
  ggtitle("Registration 2014−2010", subtitle = paste("p =",round(covered1, digits=2),"%")) +
  theme(legend.position = "none")->p1

ggplot(data=sim_coefficients, aes(x=B2, fill=b2_cond, y=..density..)) +
  geom_histogram(color="black", binwidth = 0.25) +
  geom_vline(xintercept = -est2z, linetype="dashed") +
  geom_vline(xintercept = +est2z, linetype="dashed") +
  scale_fill_manual(values = c("white", "grey"))+
  xlim(-5,5) + 
  xlab("Estimate")+
  ylab("Density")+
  ggtitle("Turnout 2014−2010", subtitle = paste("p =",round(covered2, digits=2),"%")) +
  theme(legend.position = "none")->p2

ggplot(data=sim_coefficients, aes(x=B3, fill=b3_cond, y=..density..)) +
  geom_histogram(color="black", binwidth = 0.25) +
  geom_vline(xintercept = -est3z, linetype="dashed") +
  geom_vline(xintercept = +est3z, linetype="dashed") +
  scale_fill_manual(values = c("white", "grey"))+
  xlim(-5,5) + 
  xlab("Estimate")+
  ylab("Density")+
  ggtitle("Registration 2016−2012", subtitle = paste("p =",round(covered3, digits=2),"%")) +
  theme(legend.position = "none")->p3

ggplot(data=sim_coefficients, aes(x=B4, fill=b4_cond, y=..density..)) +
  geom_histogram(color="black", binwidth = 0.25) +
  geom_vline(xintercept = -est4z, linetype="dashed") +
  geom_vline(xintercept = +est4z, linetype="dashed") +
  scale_fill_manual(values = c("white", "grey"))+
  xlim(-5,5) + 
  xlab("Estimate")+
  ylab("Density")+
  ggtitle("Turnout 2016−2012", subtitle = paste("p =",round(covered4, digits=2),"%")) +
  theme(legend.position = "none")->p4

grid.arrange(p1,p2,p3,p4)
